import requests
from bs4 import BeautifulSoup
def craw_main(url):
    header= {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36'}

    html = requests.get(url,headers=header).text

    soup = BeautifulSoup(html,"lxml")

    ol = soup.find("ol",class_="grid_view")
    lis =ol.find_all("li")
    for li in lis:
        # 编号
        num = li.find('em',class_='').get_text()
        # 名称
        title = li.find('span',class_='title').get_text()
        # 评分
        score = li.find('span',class_='rating_num').get_text()
        # 导演
        director = li.find('p',class_='')
        director = director.get_text()
        director = director.replace(" ","").replace("\n","")
        director = director.replace("\xa0","").replace("\xee","").replace("\xf6","").replace("\u0161","").replace("\xf4","")

        print(str(num)+','+title+','+score+'\r\n'+director+'\r\n')

# if __name__ == "__main__":
#     page=0
#     while page<=125:
#         url='https://movie.douban.com/top250?start=%d&filter='%page
#         craw_main(url)
#         page += 25

